Load the data set from Herbst et al. (2021).

load("multiomics_MAE.RData")
prot <- multiomics_MAE[["proteomics"]]
dim(prot)
## [1] 7311   68
vsn::meanSdPlot(prot)

1 Correlation analysis

Run the correlation analysis:

  • Pearson correlation and
  • Spearman correlation.

Only calculate the values for correlation with TBX21.

cor_pearson <- lapply(seq_len(nrow(prot)), function(i) {
    psych::corr.test(x = prot["TBX21", ], y = prot[i, ], method = "pearson", 
        adjust = "BH")
})
cor_spearman <- lapply(seq_len(nrow(prot)), function(i) {
    psych::corr.test(x = prot["TBX21", ], y = prot[i, ], method = "spearman", 
        adjust = "BH")
})
## create vectors with coefficient values, p-values, adjusted p-values
cor_pearson_coef <- unlist(lapply(cor_pearson, function(i) i[["r"]]))
names(cor_pearson_coef) <- rownames(prot)
cor_pearson_p <- unlist(lapply(cor_pearson, function(i) i[["p"]]))
names(cor_pearson_p) <- rownames(prot)
cor_pearson_padj <- unlist(lapply(cor_pearson, function(i) i[["p.adj"]]))
names(cor_pearson_padj) <- rownames(prot)

cor_spearman_coef <- unlist(lapply(cor_spearman, function(i) i[["r"]]))
names(cor_spearman_coef) <- rownames(prot)
cor_spearman_p <- unlist(lapply(cor_spearman, function(i) i[["p"]]))
names(cor_spearman_p) <- rownames(prot)
cor_spearman_padj <- unlist(lapply(cor_spearman, function(i) i[["p.adj"]]))
names(cor_spearman_padj) <- rownames(prot)

## bind to a data.frame
df_corr <- data.frame(
    protein = names(cor_pearson_coef),
    pearson_coef = cor_pearson_coef,
    pearson_p = cor_pearson_p,
    pearson_padj = cor_pearson_padj,
    spearman_coef = cor_spearman_coef,
    spearman_p = cor_spearman_p,
    spearman_padj = cor_spearman_padj 
)

## show the correlating features and write the values to a file
rmarkdown::paged_table(df_corr[order(abs(df_corr$pearson_coef), decreasing=TRUE), ])
write.table(df_corr, file = "correlation_coefficient_pvalue_TB21.txt",
    sep = "\t", quote = FALSE, row.names = FALSE)

1.1 Co-expression with RUNX3 and IRF9

It was mentioned by Philipp, that there is a potential regulation towards RUNX3 and IRF9. What are the correlation coefficients with those?

2 Pathway analysis

According to https://www.genome.jp/entry/hsa:30009, TBX21 is located in the following pathway maps:

  • hsa04658: Th1 and Th2 cell differentiation
  • hsa04659: Th17 cell differentiation
  • hsa05321: Inflammatory bowel disease

Visualize these pathways together with their correlation coefficients.

coef <- matrix(c(df_corr$pearson_coef, df_corr$spearman_coef), 
    byrow = TRUE, nrow = 2)
colnames(coef) <- df_corr$protein
rownames(coef) <- c("pearson", "spearman")
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04658",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_pathway")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04659",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_pathway")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa05321",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_pathway")
## [1] "Note: 177 of 7311 unique input IDs unmapped."

3 Pathway analysis of correlated proteins

Do some hacking here and assign the Pearson correlation coefficients to proteome slot and Spearman correlation coefficients to transcriptome slot to jointly analyze these two coefficients for the pathway analysis.

set.seed(2022)
databases <- c("kegg")
layers <- c("proteome", "transcriptome")
pathways <- getMultiOmicsFeatures(dbs = databases, layer = layers, 
    returnProteome = "SYMBOL", useLocal = FALSE)

## requires logFC, pValue, adj.pValue
omics_data <- initOmicsDataStructure(layer = layers)

## set the p-values that are small (0) to a small values
df_corr$pearson_p <- ifelse(df_corr$pearson_p < 1e-60, 1e-60, df_corr$pearson_p)
df_corr$spearman_p <- ifelse(df_corr$spearman_p < 1e-60, 1e-60, df_corr$pearson_p)

## add 1st layer
omics_data$proteome <- rankFeatures(logFC = df_corr$pearson_coef, 
    pvalues = df_corr$pearson_p)
names(omics_data$proteome) <- df_corr$protein

## add 2nd layer
omics_data$transcriptome <- rankFeatures(logFC = df_corr$spearman_coef, 
    pvalues = df_corr$spearman_p)
names(omics_data$transcriptome) <- df_corr$protein

## Run the pathway enrichment
# use the multiGSEA function to calculate the enrichment scores
# for all omics layer at once.
enrichment_scores <- multiGSEA(pathways, omics_data)

## Calculate the aggregated p-values
df <- extractPvalues(enrichmentScores = enrichment_scores,
    pathwayNames = enrichment_scores$transcriptome$pathway)
df <- data.frame(pathway = enrichment_scores$transcriptome$pathway, df)
df$pathway <- factor(df$pathway, levels = df$pathway)
df$ES_transcriptome <- enrichment_scores$transcriptome$ES
df$NES_transcriptome <- enrichment_scores$transcriptome$NES
df$ES_proteome <- enrichment_scores$proteome$ES
df$NES_proteome <- enrichment_scores$proteome$NES
df$combined_pval <- combinePvalues(df)
df$combined_padj <- p.adjust(df$combined_pval, method = "BH")

## filter the data.frame and order
df_cut <- df[!is.na(df[, "proteome_padj"]) & !is.na(df[, "transcriptome_padj"]), ]
df_cut <- df_cut[order(df_cut[, "combined_pval"]), ]
df_cut$log10combp <- -log10(df_cut$combined_pval)
df_cut$pathway <- factor(df_cut$pathway, levels = df_cut$pathway)

rmarkdown::paged_table(df_cut)
## write the object to a file
write.table(df_cut, file = "correlation_TBX21_multiGSEA_pathways.txt",
    dec = ".", sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)

## print the top 10 pathways with lowest combined p value
ggplot(df_cut[1:10,], aes(x = pathway, y = log10combp)) + 
  geom_bar(stat = "identity") + coord_flip() + ylab("-log10(p-value)")

## pathway visualization
coef <- matrix(c(df_corr$pearson_coef, df_corr$spearman_coef), 
    byrow = TRUE, nrow = 2)
colnames(coef) <- df_corr$protein
rownames(coef) <- c("pearson", "spearman")

## Th1 and Th2 cell differentiation - hsa04658
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04658",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## Inflammatory bowel disease - hsa05321
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa05321",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## Phosphatidylinositol signaling system - hsa04070
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04070",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## C-type lectin receptor signaling pathway - hsa04625
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04625",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## Th17 cell differentiation - hsa04659
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04659",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## TNF signaling pathway - hsa04668
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04668",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## Prolactin signaling pathway - hsa04917
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04917",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## "Amyotrophic lateral sclerosis" - hsa05014
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa05014",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## Pathways of neurodegeneration - multiple diseases - hsa05022
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa05022",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."
## "Coronavirus disease - COVID-19" - hsa05171
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa05171",
    same.layer = TRUE, species = "hsa", out.suffix = "TBX_correlated")
## [1] "Note: 177 of 7311 unique input IDs unmapped."

4 Differential expression

Split the data set up in 0-25%-quantile group and 75-100% quantile group and perform differential abundance analysis between these two groups.

qu <- quantile(prot["TBX21", ])
qu_025 <- prot["TBX21", ] < qu["25%"]
qu_2550 <- prot["TBX21", ] >= qu["25%"] & prot["TBX21", ] < qu["75%"]
qu_75100 <- prot["TBX21", ] >= qu["75%"]

tbx21_groups <- character(length(prot["TBX21", ]))
tbx21_groups <- ifelse(qu_025, "lower", tbx21_groups)
tbx21_groups <- ifelse(qu_2550, "middle", tbx21_groups)
tbx21_groups <- ifelse(qu_75100, "upper", tbx21_groups)

## how does this grouping agree with ASB-CLL?
pg <- read.table("PGs.txt", sep = "\t", header = TRUE)
pg$PG_ASB_CLL <- ifelse(pg$PG == "ASB-CLL", "ASB-CLL", "other")
all(names(tbx21_groups) == pg$patient_ID_CLL)
## [1] TRUE
table(tbx21_groups, pg$PG)
##             
## tbx21_groups ASB-CLL M-PG TP53-PG Tris12M-PG Tris12U-PG U-PG
##       lower        5    2       3          2          1    4
##       middle       5   10       1          4          2   12
##       upper        2    6       0          3          5    1
table(tbx21_groups, pg$PG_ASB_CLL)
##             
## tbx21_groups ASB-CLL other
##       lower        5    12
##       middle       5    29
##       upper        2    15

Perform now the differential abundance analysis between qu_025 group and qu_75100 group. The ratios were previously log2 transformed - transform them by taking 2^(log2(ratio)).

all(colnames(prot) == pg$patient_ID_CLL)
## [1] TRUE
cD <- data.frame(sample = colnames(prot), qu = as.character(tbx21_groups))
design <- model.matrix(~ 0 + qu, data = cD)
colnames(design) <- make.names(colnames(design))
fit <- lmFit(object = 2^prot, design = design, method = "ls")

## create contrasts
contrasts <- makeContrasts(
    lower_vs_upper = qulower - quupper,
    levels = design)
fit_c <- contrasts.fit(fit, contrasts)
fit_eB <- eBayes(fit_c)

## set parameters for differential expression
num <- Inf
p_val <- 1
adj <- "BH"

Return the log FC and p-values for differentially abundant proteins.

tT <- topTable(fit_eB, number = num, p.value = p_val, adjust.method = adj,
    coef = "lower_vs_upper")
rmarkdown::paged_table(tT)
## show for RUNX3 and IRF9
rmarkdown::paged_table(tT[c("RUNX3", "IRF9"), ])

5 Package version

Print here the package versions used in this analysis.

sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Germany.utf8  LC_CTYPE=English_Germany.utf8   
## [3] LC_MONETARY=English_Germany.utf8 LC_NUMERIC=C                    
## [5] LC_TIME=English_Germany.utf8    
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] MultiAssayExperiment_1.24.0 limma_3.54.1               
##  [3] multiGSEA_1.8.0             MatrixQCvis_1.7.3          
##  [5] shiny_1.7.4                 plotly_4.10.1              
##  [7] SummarizedExperiment_1.28.0 Biobase_2.58.0             
##  [9] GenomicRanges_1.50.2        GenomeInfoDb_1.34.7        
## [11] IRanges_2.32.0              S4Vectors_0.36.1           
## [13] BiocGenerics_0.44.0         MatrixGenerics_1.10.0      
## [15] matrixStats_0.63.0          pathview_1.38.0            
## [17] forcats_1.0.0               stringr_1.5.0              
## [19] dplyr_1.1.0                 purrr_1.0.1                
## [21] readr_2.1.3                 tidyr_1.3.0                
## [23] tibble_3.1.8                tidyverse_1.3.2            
## [25] ggplot2_3.4.0               knitr_1.42                 
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3         bit64_4.0.5            multcomp_1.4-20       
##   [4] DelayedArray_0.24.0    data.table_1.14.6      rpart_4.1.19          
##   [7] KEGGREST_1.38.0        RCurl_1.98-1.10        doParallel_1.0.17     
##  [10] generics_0.1.3         metap_1.8              snow_0.4-4            
##  [13] preprocessCore_1.60.2  cowplot_1.1.1          TH.data_1.1-1         
##  [16] RSQLite_2.2.20         bit_4.0.5              tzdb_0.3.0            
##  [19] mutoss_0.1-12          xml2_1.3.3             lubridate_1.9.1       
##  [22] httpuv_1.6.8           assertthat_0.2.1       gargle_1.3.0          
##  [25] xfun_0.37              hms_1.1.2              jquerylib_0.1.4       
##  [28] evaluate_0.20          promises_1.2.0.1       fansi_1.0.4           
##  [31] dbplyr_2.3.0           readxl_1.4.1           Rgraphviz_2.42.0      
##  [34] DBI_1.1.3              htmlwidgets_1.6.1      googledrive_2.0.0     
##  [37] ellipsis_0.3.2         RSpectra_0.16-1        crosstalk_1.2.0       
##  [40] backports_1.4.1        deldir_1.0-6           vctrs_0.5.2           
##  [43] imputeLCMD_2.1         cachem_1.0.6           withr_2.5.0           
##  [46] checkmate_2.1.0        mnormt_2.1.1           cluster_2.1.4         
##  [49] lazyeval_0.2.2         crayon_1.5.2           pkgconfig_2.0.3       
##  [52] labeling_0.4.2         qqconf_1.3.1           nlme_3.1-160          
##  [55] nnet_7.3-18            rlang_1.0.6            lifecycle_1.0.3       
##  [58] sandwich_3.0-2         affyio_1.68.0          mathjaxr_1.6-0        
##  [61] modelr_0.1.10          cellranger_1.1.0       graph_1.76.0          
##  [64] Matrix_1.5-3           zoo_1.8-11             reprex_2.0.2          
##  [67] base64enc_0.1-3        GlobalOptions_0.1.2    googlesheets4_1.0.1   
##  [70] png_0.1-8              viridisLite_0.4.1      rjson_0.2.21          
##  [73] bitops_1.0-7           shinydashboard_0.7.2   Biostrings_2.66.0     
##  [76] blob_1.2.3             shape_1.4.6            jpeg_0.1-10           
##  [79] tmvtnorm_1.5           scales_1.2.1           memoise_2.0.1         
##  [82] graphite_1.44.0        magrittr_2.0.3         plyr_1.8.8            
##  [85] hexbin_1.28.2          zlibbioc_1.44.0        compiler_4.2.2        
##  [88] RColorBrewer_1.1-3     plotrix_3.8-2          pcaMethods_1.90.0     
##  [91] clue_0.3-64            KEGGgraph_1.58.3       cli_3.6.0             
##  [94] affy_1.76.0            XVector_0.38.0         htmlTable_2.4.1       
##  [97] Formula_1.2-4          MASS_7.3-58.1          tidyselect_1.2.0      
## [100] vsn_3.66.0             stringi_1.7.12         highr_0.10            
## [103] yaml_2.3.7             proDA_1.12.0           askpass_1.1           
## [106] norm_1.0-10.0          latticeExtra_0.6-30    grid_4.2.2            
## [109] sass_0.4.5             fastmatch_1.1-3        tools_4.2.2           
## [112] timechange_0.2.0       parallel_4.2.2         circlize_0.4.15       
## [115] rstudioapi_0.14        foreach_1.5.2          foreign_0.8-83        
## [118] gridExtra_2.3          farver_2.1.1           Rtsne_0.16            
## [121] digest_0.6.31          BiocManager_1.30.19    Rcpp_1.0.10           
## [124] broom_1.0.3            shinyhelper_0.3.2      later_1.3.0           
## [127] org.Hs.eg.db_3.16.0    httr_1.4.4             AnnotationDbi_1.60.0  
## [130] ComplexHeatmap_2.14.0  psych_2.2.9            Rdpack_2.4            
## [133] colorspace_2.1-0       rvest_1.0.3            XML_3.99-0.13         
## [136] fs_1.6.0               reticulate_1.28        umap_0.2.9.0          
## [139] splines_4.2.2          sn_2.1.0               multtest_2.54.0       
## [142] gmm_1.7                xtable_1.8-4           jsonlite_1.8.4        
## [145] UpSetR_1.4.0           R6_2.5.1               TFisher_0.2.0         
## [148] Hmisc_4.7-2            pillar_1.8.1           htmltools_0.5.4       
## [151] mime_0.12              glue_1.6.2             fastmap_1.1.0         
## [154] BiocParallel_1.32.5    codetools_0.2-18       fgsea_1.24.0          
## [157] mvtnorm_1.1-3          utf8_1.2.3             lattice_0.20-45       
## [160] bslib_0.4.2            numDeriv_2016.8-1.1    curl_5.0.0            
## [163] shinyjs_2.1.0          openssl_2.0.5          interp_1.1-3          
## [166] survival_3.4-0         rmarkdown_2.20         munsell_0.5.0         
## [169] GetoptLong_1.0.5       GenomeInfoDbData_1.2.9 iterators_1.0.14      
## [172] impute_1.72.3          haven_2.5.1            gtable_0.3.1          
## [175] rbibutils_2.2.13

  1. European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany↩︎